base R has aweful plot, don’t use it, on the right hand is the ggplot

has legend and axis are well labeled

ggplot will be the function to use all the time for now

2013-2014 came out ggplot,

ggplot2/latice is another graphic function

tidy data is very important!!

grammer of graphics (gg)

starting with dataset, features of graphic (axis, color) that you care and map to variables of data, geom to show

panels … be consistent …

# national geographic and weather service data
# data are publically available
# download data from three stations
# daily weather information of the three stations
weather_df = 
  rnoaa::meteo_pull_monitors(c("USW00094728", "USC00519397", "USS0023B17S"),
                      var = c("PRCP", "TMIN", "TMAX"), 
                      date_min = "2017-01-01",
                      date_max = "2017-12-31") %>%
  mutate(
    name = recode(id, USW00094728 = "CentralPark_NY", 
                      USC00519397 = "Waikiki_HA",
                      USS0023B17S = "Waterhole_WA"),
    # .1 celc has to devided by 10
    tmin = tmin / 10,
    tmax = tmax / 10) %>%
  select(name, id, everything())
## Registered S3 method overwritten by 'crul':
##   method                 from
##   as.character.form_file httr
## Registered S3 method overwritten by 'hoardr':
##   method           from
##   print.cache_info httr
## file path:          /Users/macbook/Library/Caches/rnoaa/ghcnd/USW00094728.dly
## file last updated:  2019-09-04 21:33:58
## file min/max dates: 1869-01-01 / 2019-09-30
## file path:          /Users/macbook/Library/Caches/rnoaa/ghcnd/USC00519397.dly
## file last updated:  2019-09-04 21:34:09
## file min/max dates: 1965-01-01 / 2019-09-30
## file path:          /Users/macbook/Library/Caches/rnoaa/ghcnd/USS0023B17S.dly
## file last updated:  2019-09-04 21:34:13
## file min/max dates: 1999-09-01 / 2019-09-30
weather_df
## # A tibble: 1,095 x 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2017-01-01     0   8.9   4.4
##  2 CentralPark_NY USW00094728 2017-01-02    53   5     2.8
##  3 CentralPark_NY USW00094728 2017-01-03   147   6.1   3.9
##  4 CentralPark_NY USW00094728 2017-01-04     0  11.1   1.1
##  5 CentralPark_NY USW00094728 2017-01-05     0   1.1  -2.7
##  6 CentralPark_NY USW00094728 2017-01-06    13   0.6  -3.8
##  7 CentralPark_NY USW00094728 2017-01-07    81  -3.2  -6.6
##  8 CentralPark_NY USW00094728 2017-01-08     0  -3.8  -8.8
##  9 CentralPark_NY USW00094728 2017-01-09     0  -4.9  -9.9
## 10 CentralPark_NY USW00094728 2017-01-10     0   7.8  -6  
## # … with 1,085 more rows

create a ggplot

ggplot(weather_df, aes(x = tmin, y = tmax))

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

# alternative way of making this plot
# default is printing the plot
# Most time not using the style
#weather_df %>% filter(name =="CentralPark_NY")
#scaterplot = weather_df %>% 
#  ggplot(aes(x = timin, y = tmax)) + geom_point()
#scaterplot
#weather_df %>%
#  ggplot(aes(x = tmin, y = tmax)) + 
#  geom_point()

plot_weather = 
  weather_df %>%
  ggplot(aes(x = tmin, y = tmax)) 

plot_weather + geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name))
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) +
  # se: grey area not certain area, not very useful to set as true
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing missing values (geom_point).

# facet
# do not send multiple plots.
ggplot(weather_df, aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = .5) +
  geom_smooth(se = FALSE) + 
  facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing missing values (geom_point).

# this is fine, but not very interesting
# color = name is global
ggplot(weather_df, aes(x = date, y = tmax, color = name)) + 
  #size = prcp, put bubbles accounts for prcp amont
  geom_point(aes(size = prcp), alpha = .5) +
  geom_smooth(se = FALSE) + 
  facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

# alpha is the transparent level. .1 is not at all
weather_df %>% 
  filter(name == "CentralPark_NY") %>% 
  mutate(tmax_fahr = tmax * (9 / 5) + 32,
         tmin_fahr = tmin * (9 / 5) + 32) %>% 
  ggplot(aes(x = tmin_fahr, y = tmax_fahr)) +
  geom_point(alpha = .5) + 
  geom_smooth(method = "lm", se = FALSE)

why do ‘aes’ positions mater? se = false, no CIs

define color at x, y level or geom level makes the plots looks different

at geom level, the smooth color has only one color

outside geom, it applies smooth line colors according to the names (three different smooth lines)

can modify the graphics features quickly with little modification.

some extra stuff

ggplot(weather_df, aes(x = date, y = tmax, color = name)) + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).

ggplot(weather_df, aes(x = tmax, y = tmin)) + 
  geom_hex()
## Warning: Removed 15 rows containing non-finite values (stat_binhex).

ggplot(weather_df) + geom_point(aes(x = tmax, y = tmin, color = "blue"))
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df) + geom_point(aes(x = tmax, y = tmin, color = "blue"))
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(weather_df, aes(x = tmax)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

ggplot(weather_df, aes(x = tmax, fill = name)) + 
  geom_histogram(position = "dodge", binwidth = 2)
## Warning: Removed 3 rows containing non-finite values (stat_bin).

ggplot(weather_df, aes(x = tmax, fill = name)) + 
  geom_density(alpha = .4, adjust = .5, color = "blue")
## Warning: Removed 3 rows containing non-finite values (stat_density).

ggplot(weather_df, aes(x = name, y = tmax)) + geom_boxplot()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

ggplot(weather_df, aes(x = name, y = tmax)) + 
  geom_violin(aes(fill = name), color = "blue", alpha = .5) + 
  stat_summary(fun.y = median, geom = "point", color = "blue", size = 4)
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_summary).

ggplot(weather_df, aes(x = tmax, y = name)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 1.84
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).

ggplot(weather_df, aes(x = prcp)) + 
  geom_density(aes(fill = name), alpha = .5) 
## Warning: Removed 3 rows containing non-finite values (stat_density).

ggplot(weather_df, aes(x = prcp, y = name)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 4.61
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).

ggplot(weather_df, aes(y = prcp, x = name)) + 
  geom_boxplot() 
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

weather_df %>% 
  filter(prcp > 0) %>% 
  ggplot(aes(x = prcp, y = name)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 19.7

weather_plot = ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) 

ggsave("weather_plot.pdf", weather_plot, width = 8, height = 5)
## Warning: Removed 15 rows containing missing values (geom_point).
knitr::opts_chunk$set(
  fig.width = 6,
  fig.asp = .6,
  out.width = "90%"
)

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name))
## Warning: Removed 15 rows containing missing values (geom_point).

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.